
/*

 THIS .DO FILE PREPARES THE DATA IN THE FORMAT AS USED IN THE VARIANCE DECOMPOSITION BY DI GIOVANNI, LEVCHENKO AND MEJEAN (ECMA2014)
 
 - REQUIRE THE DATA TO CONTAIN:
   - FIRM-DESTINATION LEVEL REVENUE (BY YEAR)
     - "HOME" IS DESTINATION "USA" (COUNTRY CODE: 1000) WHEREAS "FOREIGN" IS DECOMPOSED ACROSS DIFFERENT FOREIGN COUNTRIES
   - FIRM-LEVEL CHARACTERISTICS (BY YEAR)
   
*/

* Housekeeping
set more off
timer clear

* Working paths
global base "/directory"
global DoPth "$base/dofiles/"
global PthIn "$base/data/" 
global PthOut "$base/output/"

* Globals of program
global firsty 1994
global secondy 1995
global lasty 2011

*** COLLAPSING LFTTD DATA TO THE FIRM-DESTINATION LEVEL ***
forvalues y=$firsty(1)$lasty  {

	use "$PthIn/lfttd_`y'.dta", clear
	
	bys firmid country: egen sales_for = sum(sales)
	bys firmid country: gen d_firmidctry = _n
	keep if d_firmidctry==1
	
	bys firmid: egen revenue_foreign = sum(sales_for)
	
	keep firmid country iso* sales_for revenue_foreign
	
	save "$PthOut/lfttd_`y'_reduced.dta", replace

}

*** MERGING EXPORT DATA TO LBD-F DATA ***
forvalues y=$firsty(1)$lasty  {

	use "$PthIn/lbd_firm_rev_`y'.dta", clear
	
	merge 1:m using "$PthOut/lfttd_`y'_reduced.dta"
	drop if _merge==2
	drop _merge
	erase "$PthOut/lfttd_`y'_reduced.dta"
	
	gen revenue_home = revenue_total - revenue_foreign
	
	sort firmid country
	
	save "$PthOut/firm_dest_LBD_rev_`y'.dta", replace
	
}

*** RESHAPING OF LBD-LFTTD MATCHED FIRM-DESTINATION LEVEL DATA *** 
forvalues y=$firsty(1)$lasty  {

	* Create a firm-specific row with revenue from US source - "revenue_home"
	use "$PthOut/firm_dest_LBD_rev_`y'.dta", clear
	
	keep firmid estabs emp pay firmage revenue* sales_for country iso* merge_lfttd state county zip sic mfsic* bestsic bestnaics mnaics* industry*
	
	bys firmid: gen d_firmid = _n
	keep if d_firmid==1
	drop d_firmid
	
	replace sales_for = revenue_home
	replace country="1000" if revenue_home!=.
	*replace iso="US" if revenue_home!=.
	
	* Create "openness" (i.e., fraction of revenue that is exported) variable - yearly
	gen openness = revenue_foreign/revenue_total
	
	gen d_temp = 1
	
	save "$PthOut/temp/temp`y'.dta", replace
	
	* Create firm-destination specific revenue across home (US) and foreign countries (by year)
	use "$PthOut/firm_dest_LBD_rev_`y'.dta", clear

	keep firmid estabs emp pay firmage revenue* sales_for country iso* merge_lfttd state county zip sic mfsic* bestsic bestnaics mnaics* industry*
	
	append using "$PthOut/temp/temp`y'.dta"
	
	sort firmid country
	
	* Note:
	*	- Appending procedure creates duplicates for non-exporters
	*	- If "merge_lfttd==." then foreign revenue must be zero by construction
	*	- Drop these duplicates
	drop if merge_lfttd==. & d_temp==.
	drop d_temp
	drop revenue_total
	
	ren sales_for revctry
	gen year = `y'
	
	save "$PthOut/temp/firm_dest_LBD_rev2_`y'.dta", replace
	erase "$PthOut/temp/temp`y'.dta"
}

* Append yearly files together to create "master" file (at firm-destination level)
use "$PthOut/temp/firm_dest_LBD_rev2_$firsty.dta", clear
forvalues y=$secondy(1)$lasty  {

	append using "$PthOut/temp/firm_dest_LBD_rev2_`y'.dta"
	erase "$PthOut/temp/firm_dest_LBD_rev2_`y'.dta"
	
	if `y'==$lasty  {
	
		sort firmid country year
		save "$PthOut/firm_dest_LBD_rev_master.dta", replace
		
	}

}
erase "$PthOut/temp/firm_dest_LBD_rev2_$firsty.dta"

*** Add additional variables needed for Di Giovanni, Levchenko and Mejean (2014) variance decomposition
use "$PthOut/firm_dest_LBD_rev_master.dta", clear

sort firmid country year

* Observations with inconsistent total sales and exports
*	- Majority of these specific observations have no reported total sales (i.e., missing in LBD-SSL) but some reported exports
*	- Currentlym we will drop the "home" sales for these firms through the dummy "neg_rev" as export sales are still informative
gen neg_rev = 0
replace neg_rev = 1 if revctry<0 & country=="1000"
drop if neg_rev==1
drop neg_rev

* Growth rates (at firm-destination level)
bys firmid country: gen gr_revctry = ln(revctry[_n]/revctry[_n-1]) if year[_n]==year[_n-1]+1
bys firmid country: gen gr_revctry2 = (revctry[_n]/revctry[_n-1])-1 if year[_n]==year[_n-1]+1
bys firmid country: gen gr_revctryDHS = (revctry[_n]-revctry[_n-1])/(0.5*(revctry[_n]+revctry[_n-1])) if year[_n]==year[_n-1]+1

* Growth outliers
gen out_gr = 1 if (gr_revctry>$grup | gr_revctry<$grdown) & gr_revctry!=.
gen out_grDHS = 1 if (gr_revctryDHS>$grup | gr_revctryDHS<$grdown) & gr_revctryDHS!=.

* Rename "firmage" variable
ren firmage age

* Lagged sales (for construction of weights)
bys firmid country: gen lag_revctry = revctry[_n-1]

* Lagged sales - missing values substituted to be equal to zero
gen lag_revctry_c = lag_revctry
replace lag_revctry_c = 0 if lag_revctry==.

* Harmonization of industry (SIC2/SIC3) codes across time
*	- This should not be that important as aggregate, industry, local and firm-level components are estimated year by year
*	- Alternatively, NAICS codes can be harmonized

save "$PthOut/firm_dest_LBD_rev_master.dta", replace

